use "C:\Users\n12073\Dropbox\farmer selection files\Paper_yield\EDCC\Second resubmission 02.2018\data sharing\rowplanting_farmlevel.dta", clear
* paste here correct link to "rowplanting_farmlevel.dta" file

*****************************
** TABLE 2: SAMPLE NUMBERS **
*****************************
**see 'sample numbers'
*0) Baseline survey coverage
tab  participants random_FTC if baseline==1
*1) impact survey coverage
tab participants random_FTC
*** RANDOM SUB SAMPLE
drop if participants==2
tab participants random_FTC
*2) non attrition in self reported sample
tab participants random_FTC if attrition_selfreported==0
*3) self reported coverage
tab participants random_FTC if complier==1 & attrition_selfreported==0
*4) cropcut coverage
tab participants random_FTC if complier==1 & baseline==1
tab participants random_FTC if complier==1 & baseline==1 & attrition_cropcut==0

replace initialselected=1 if initialselected==2 
***************************
** TABLE 3: BALANCEDNESS **
***************************
**see 'balancedness'
* RANDOMIZATION in random sample
** household and exp plot characteristics in tabl 2
local number 1
foreach var of varlist head_age head_gender head_write head_literate head_education_primary hh_distance_ftc hh_size hh_agriassets_totalvalue_ln hh_assets_totalvalue_ln hh_landowned_ln hh_otherincome hh_loan hh_iqubmember AREA plot_location soil_slopy soil_quality_high plot_plow_number plot_weeding plot_expertvisit training_experts seeds_quncho seeds_freeDA seed_rate plot_organicinput plot_fertilizer plot_manure plot_manure_rel plot_urea plot_urea_rel plot_DAP plot_DAP_rel plot_herb plot_herb_rel plot_pest plot_disease plot_lateinput inputs_delay {
	reg `var' initialselected if random_FTC==1, cluster(kebele)
	estimates store b`number'
	local number=`number'+1
}
reg teff_yield initialselected if yield_both==1, cluster(kebele)
estimates store b38
reg teff_area_ha initialselected if yield_both==1, cluster(kebele)
estimates store b39
bootstrap ,reps(400) seed(123): reg ci initialselected, cluster(kebele)
estimates store b40
outreg2 [ b1  b2  b3  b4  b5  b6  b7  b8  b9  b10  b11  b12  b13  b14  b15  b16  b17  b18  b19  b20  b21  b22  b23  b24  b25  b26  b27  b28  b29 b30 b31 b32 b33 b34 b35 b36 b37 b38 b39 b40] using myfile, auto(2) excel replace tstat aster(tstat) noparen
** R1 asks for mean and sd of BC and RP farmers seperately
preserve
keep if random_FTC==1
keep initialselected head_age head_gender head_write head_literate head_education_primary hh_distance_ftc hh_size hh_agriassets_totalvalue_ln hh_assets_totalvalue_ln hh_landowned_ln hh_otherincome hh_loan hh_iqubmember AREA plot_location soil_slopy soil_quality_high plot_plow_number plot_weeding plot_expertvisit training_experts seeds_quncho seeds_freeDA seed_rate plot_organicinput plot_fertilizer plot_manure plot_manure_rel plot_urea plot_urea_rel plot_DAP plot_DAP_rel plot_herb plot_herb_rel plot_pest plot_disease plot_lateinput inputs_delay  teff_yield teff_area_ha ci
order head_age head_gender head_write head_literate head_education_primary hh_distance_ftc hh_size hh_agriassets_totalvalue_ln hh_assets_totalvalue_ln hh_landowned_ln hh_otherincome hh_loan hh_iqubmember AREA plot_location soil_slopy soil_quality_high plot_plow_number plot_weeding plot_expertvisit training_experts seeds_quncho seeds_freeDA seed_rate plot_organicinput plot_fertilizer plot_manure plot_manure_rel plot_urea plot_urea_rel plot_DAP plot_DAP_rel plot_herb plot_herb_rel plot_pest plot_disease plot_lateinput inputs_delay  teff_yield teff_area_ha ci
keep if initialselected==0
set matsize 4000
** application dummies
outreg2 using myfile, sum(detail) excel replace  eqkeep(mean sd)  side label auto(2) keep(head_age head_gender head_write head_literate head_education_primary hh_distance_ftc hh_size hh_agriassets_totalvalue_ln hh_assets_totalvalue_ln hh_landowned_ln hh_otherincome hh_loan hh_iqubmember AREA plot_location soil_slopy soil_quality_high plot_plow_number plot_weeding plot_expertvisit training_experts seeds_quncho seeds_freeDA seed_rate plot_organicinput plot_fertilizer plot_manure plot_manure_rel plot_urea plot_urea_rel plot_DAP plot_DAP_rel plot_herb plot_herb_rel plot_pest plot_disease plot_lateinput inputs_delay  teff_yield teff_area_ha ci)
restore
preserve
keep if random_FTC==1
keep initialselected head_age head_gender head_write head_literate head_education_primary hh_distance_ftc hh_size hh_agriassets_totalvalue_ln hh_assets_totalvalue_ln hh_landowned_ln hh_otherincome hh_loan hh_iqubmember AREA plot_location soil_slopy soil_quality_high plot_plow_number plot_weeding plot_expertvisit training_experts seeds_quncho seeds_freeDA seed_rate plot_organicinput plot_fertilizer plot_manure plot_manure_rel plot_urea plot_urea_rel plot_DAP plot_DAP_rel plot_herb plot_herb_rel plot_pest plot_disease plot_lateinput inputs_delay  teff_yield teff_area_ha ci
order head_age head_gender head_write head_literate head_education_primary hh_distance_ftc hh_size hh_agriassets_totalvalue_ln hh_assets_totalvalue_ln hh_landowned_ln hh_otherincome hh_loan hh_iqubmember AREA plot_location soil_slopy soil_quality_high plot_plow_number plot_weeding plot_expertvisit training_experts seeds_quncho seeds_freeDA seed_rate plot_organicinput plot_fertilizer plot_manure plot_manure_rel plot_urea plot_urea_rel plot_DAP plot_DAP_rel plot_herb plot_herb_rel plot_pest plot_disease plot_lateinput inputs_delay  teff_yield teff_area_ha ci
keep if initialselected==1
set matsize 4000
** application dummies
outreg2 using myfile, sum(detail) excel replace  eqkeep(mean sd)  side label auto(2) keep(head_age head_gender head_write head_literate head_education_primary hh_distance_ftc hh_size hh_agriassets_totalvalue_ln hh_assets_totalvalue_ln hh_landowned_ln hh_otherincome hh_loan hh_iqubmember AREA plot_location soil_slopy soil_quality_high plot_plow_number plot_weeding plot_expertvisit training_experts seeds_quncho seeds_freeDA seed_rate plot_organicinput plot_fertilizer plot_manure plot_manure_rel plot_urea plot_urea_rel plot_DAP plot_DAP_rel plot_herb plot_herb_rel plot_pest plot_disease plot_lateinput inputs_delay  teff_yield teff_area_ha ci)
restore

foreach var of varlist head_gender head_write head_literate head_education_primary hh_otherincome hh_iqubmember hh_loan soil_slopy soil_quality_high seeds_quncho seeds_freeDA plot_organicinput plot_fertilizer plot_manure plot_urea plot_DAP plot_herb plot_pest plot_disease plot_lateinput yield_both  {
	replace `var'=`var'/100
}

***************************
** TABLE 4: ITT and LATE **
***************************
*** control for oversampling
probit initialselected head_age head_write head_gender head_cropprod hh_size hh_otherincome hh_iqubmember hh_loan hh_mobilephone hh_assets_totalvalue_ln hh_agriassets_totalvalue_ln if random_FTC==1, cluster(kebele)
predict ps
gen weight_ps=1/ps
reg YIELD_actual_cc initialselected [aw=weight_ps] if random_FTC==1 , cluster(kebele)
estimates store b1
reg YIELD_actual_is initialselected [aw=weight_ps] if random_FTC==1 , cluster(kebele) 
estimates store b2
ivregress 2sls YIELD_actual_is (participants= initialselected) [w=weight_ps] if random_FTC==1, cluster(kebele)
estimates store b3
outreg2 [ b1 b2 b3] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se ci) paren(se) bracket(ci) sortvar(participants initialselected ) nodepvar noaster
**** in the paper, we say that the results are robust to the wild bootstrapping method: this is how to calculate it
/*  cgmwildboot YIELD_actual_cc initialselected if random_FTC==1, cluster(kebele) bootcluster(kebele) seed(123) reps(400)
estimates store b4
cgmwildboot YIELD_actual_is initialselected if random_FTC==1, cluster(kebele) bootcluster(kebele) seed(123) reps(400)
estimates store b5
outreg2 [ b4 b5 b3] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se ci) paren(se) bracket(ci) sortvar(participants initialselected ) nodepvar noaster
**** use the coefficients of b4 and b5 */

** Fixed effects?
reg YIELD_actual_cc initialselected i.kebele [aw=weight_ps] if random_FTC==1, cluster(kebele)
estimates store b1
reg YIELD_actual_is initialselected i.kebele [aw=weight_ps] if random_FTC==1 , cluster(kebele)
estimates store b2
ivregress 2sls YIELD_actual_is i.kebele (participants= initialselected) [aw=weight_ps] if random_FTC==1, cluster(kebele)
estimates store b3
outreg2 [ b1 b2 b3] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se ci) paren(se) bracket(ci) sortvar(participants initialselected ) nodepvar noaster
**** in the paper, we say that the results are robust to the wild bootstrapping method: this is how to calculate it
/* tab kebele if random_FTC==1, gen (K_)
cgmwildboot YIELD_actual_cc initialselected K_2-K_19 if random_FTC==1, cluster(kebele) bootcluster(kebele) seed(123) reps(400)
estimates store b4
cgmwildboot YIELD_actual_is initialselected K_2-K_19 if random_FTC==1, cluster(kebele) bootcluster(kebele) seed(123) reps(400)
estimates store b5
outreg2 [ b4 b5 ] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se ci) paren(se) bracket(ci) sortvar(participants initialselected ) nodepvar noaster
**** use the coefficients of b4 and b5 */

********************************
** TABLE 5: ROBUSTNESS CHECKS **
********************************
* INVERSE PROBABILITY WEIGHTING
** 1) Attrition
*** cropcut data
gen selection_cropcut=1-attrition_cropcut
probit selection_cropcut participants head_age head_write hh_size hh_otherincome hh_iqubmember hh_mobilephone hh_assets_totalvalue_ln hh_agriassets_totalvalue_ln hh_landowned_ln if random_FTC==1, cluster(kebele)
predict prop_cropcut
gen weight_cropcut=1/prop_cropcut
*** self reported output
gen selection_selfreported=1-attrition_selfreported
probit selection_selfreported participants head_age head_write hh_size hh_otherincome hh_iqubmember hh_mobilephone hh_assets_totalvalue_ln hh_agriassets_totalvalue_ln hh_landowned_ln if random_FTC==1, cluster(kebele)

predict prop_selfreported
gen weight_selfreported=1/prop_selfreported
reg YIELD_actual_cc initialselected [aw=weight_cropcut] if random_FTC==1 , cluster(kebele)
estimates store b1
reg YIELD_actual_is initialselected [aw=weight_selfreported] if random_FTC==1 , cluster(kebele) 
estimates store b2
ivregress 2sls YIELD_actual_is (participants= initialselected) [w=weight_selfreported] if random_FTC==1, cluster(kebele)
estimates store b3
outreg2 [ b1 b2 b3] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se ci) paren(se) bracket(ci) sortvar(participants initialselected ) nodepvar noaster

** 2) trimmed data
qui sum YIELD_actual_cc if random_FTC==1, de 
gen YIELD_actual_cc_trim=YIELD_actual_cc if random_FTC==1
replace YIELD_actual_cc_trim=. if YIELD_actual_cc <=r(p1) | YIELD_actual_cc>=r(p99) & random_FTC==1
qui sum YIELD_actual_is if random_FTC==1, de
gen YIELD_actual_is_trim=YIELD_actual_is if random_FTC==1
replace YIELD_actual_is_trim=. if YIELD_actual_is <=r(p1) | YIELD_actual_is>=r(p99)  & random_FTC==1
bootstrap,reps(400) seed(123): reg YIELD_actual_cc_trim initialselected if random_FTC==1, cluster(kebele)
estimates store b1
bootstrap ,reps(400) seed(123): reg YIELD_actual_is_trim initialselected if random_FTC==1 , cluster(kebele)
estimates store b2
ivregress 2sls YIELD_actual_is_trim (participants= initialselected) if random_FTC==1, cluster(kebele)
estimates store b3
outreg2 [ b1 b2 b3] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se ci) paren(se) bracket(ci) sortvar(participants initialselected ) nodepvar noaster

**3) effect of unbalancedness
reg YIELD_actual_cc initialselected head_gender plot_urea_rel plot_herb  [aw=weight_ps] if random_FTC==1 , cluster(kebele)
estimates store b1
reg YIELD_actual_is initialselected head_gender plot_urea_rel plot_herb [aw=weight_ps] if random_FTC==1 , cluster(kebele) 
estimates store b2
ivregress 2sls YIELD_actual_is (participants= initialselected) head_gender plot_urea_rel plot_herb  [w=weight_ps] if random_FTC==1, cluster(kebele)
estimates store b3
outreg2 [ b1 b2 b3] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se) paren(se) sortvar(participants initialselected ) nodepvar

*** Lee (2009) bounds
leebounds YIELD_actual_is participants if random_FTC==1, cie select(selection_selfreported)
leebounds YIELD_actual_cc participants if random_FTC==1, cie select(selection_cropcut )
* caluculation of p0
di 249/342
di 154/195
di (.72807018- .78974359)/.72807018
di 193/195
di 338/342
di (.98830409- .98974359)/.98830409

* APPENDIX
** What explains compliance and non attrition?
*** USE participants (actual takeup) rather than initialselected (assignment) because it is the actual uptake that determines whether there is attrition or not
probit complier initialselected head_age head_write hh_size hh_otherincome hh_iqubmember hh_mobilephone hh_assets_totalvalue_ln hh_agriassets_totalvalue_ln hh_landowned_ln  if random_FTC==1, cluster(kebele)
estimates store b1
local number 2
foreach var of varlist selection_cropcut selection_selfreported  {
	probit `var' participants head_age head_write hh_size hh_otherincome hh_iqubmember hh_mobilephone hh_assets_totalvalue_ln hh_agriassets_totalvalue_ln hh_landowned_ln  if random_FTC==1, cluster(kebele)
	estimates store b`number'
	local number=`number'+1
}
outreg2 [b1 b2 b3] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se) paren(se)

****************************
*** HETEROGENOUS EFFECTS ***
****************************

** TABLE 7: NON PARAMETRIC TEST CRUMP(2008)
test_condate YIELD_actual_is head_age head_write hh_size hh_distance_agc hh_tvrad if random_FTC==1, tvar(initialselected)
test_condate YIELD_actual_cc head_age head_write hh_size hh_distance_agc hh_tvrad if random_FTC==1, tvar(initialselected)

** TABLE 8: HETEROGENEOUS TREATM. EFFECTS
*interaction regressions
local number 1
foreach var of varlist head_age head_write hh_size hh_distance_agc hh_tvrad {
gen `var'_i`number'=`var'*initialselected
local number=`number'+1
}

reg YIELD_actual_cc initialselected head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1, cluster(kebele)
test head_age_i1= head_write_i2= hh_size_i3=hh_distance_agc_i4=hh_tvrad_i5=0
estimates store b1
reg YIELD_actual_is initialselected head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1, cluster(kebele)
test head_age_i1= head_write_i2= hh_size_i3=hh_distance_agc_i4=hh_tvrad_i5=0
estimates store b2
outreg2 [ b1 b2 ] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se) paren(se) sortvar( initialselected ) nodepvar

* adj pvalues
parmby "reg YIELD_actual_cc initialselected head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1, cluster(kebele)", saving("C:\Users\n12073\Dropbox\farmer selection files\STATA\DATA MERGED\myparms.dta", replace)
parmby "reg YIELD_actual_is initialselected head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1, cluster(kebele)", saving("C:\Users\n12073\Dropbox\farmer selection files\STATA\DATA MERGED\myparms.dta", replace)
*use "C:\Users\n12073\Dropbox\farmer selection files\STATA\DATA MERGED\myparms.dta"
multproc, puncor(0.1) pvalue( p) method(simes) reject(test)
multproc, puncor(0.1) pvalue( p) method(bonferroni)
multproc, puncor(0.1) pvalue( p) method(yekutieli)
multproc, puncor(0.1) pvalue( p) method(krieger)

* visualization of heterogeneous effects
sqreg YIELD_actual_is initialselected if random_FTC==1, quantile(.1 .2 .3 .4 .5 .6 .7 .8 .9) rep(20)
grqreg initialselected, ci ols olsci reps(20)

*****************************************
*** TABLE 9: ALTERNATIVE EXPLANATIONS ***
*****************************************

***1 chanel:1 TOO LATE INPUTS
*gen inputs_delay_i=initialselected*inputs_delay
gen inputs_delay_rains_i=inputs_delay_rains*initialselected
***2 chanel2: Management by farmer
gen seed_management=seed_diff_peer
gen seed_management_i=seed_management*initialselected
***3: Strength of FTC
gen extension=YIELD_FTC_diff_a3*FTC_years
gen extension_i=extension*initialselected

reg YIELD_actual_cc initialselected inputs_delay_rains inputs_delay_rains_i seed_management seed_management_i extension extension_i head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1 , cluster(kebele)
test head_age_i1= head_write_i2= hh_size_i3=hh_distance_agc_i4=hh_tvrad_i5=inputs_delay_rains_i=seed_management_i=extension_i=0
estimates store b1
reg YIELD_actual_is initialselected inputs_delay_rains inputs_delay_rains_i seed_management seed_management_i extension extension_i head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1 , cluster(kebele)
test head_age_i1= head_write_i2= hh_size_i3=hh_distance_agc_i4=hh_tvrad_i5=inputs_delay_rains_i=seed_management_i=extension_i=0
estimates store b2
outreg2 [ b1 b2 ] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se) paren(se) sortvar( initialselected ) nodepvar

* adj pvalues
parmby "reg YIELD_actual_cc initialselected inputs_delay_rains inputs_delay_rains_i seed_management seed_management_i extension extension_i head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1 , cluster(kebele)", saving("C:\Users\n12073\Dropbox\farmer selection files\STATA\DATA MERGED\myparms.dta", replace)
parmby "reg YIELD_actual_is initialselected inputs_delay_rains inputs_delay_rains_i seed_management seed_management_i extension extension_i head_age head_age_i head_write head_write_i2 hh_size hh_size_i hh_distance_agc hh_distance_agc_i hh_tvrad hh_tvrad_i i.kebele if random_FTC==1 , cluster(kebele)", saving("C:\Users\n12073\Dropbox\farmer selection files\STATA\DATA MERGED\myparms.dta", replace)
*use "C:\Users\n12073\Dropbox\farmer selection files\STATA\DATA MERGED\myparms.dta"
multproc, puncor(0.1) pvalue( p) method(simes) reject(test)
multproc, puncor(0.05) pvalue( p) method(simes) reject(test2)
multproc, puncor(0.01) pvalue( p) method(simes) reject(test3)


***************************************
** FIGURE 2: ROW PLANTING ATTRIBUTES **
***************************************
label var seed_rate "seed rate (kg/ha)"
label var rowplanting_space "space (cm) between rows"
label var rowplanting_g_row "seed (gram) per row"
label var  rowplanting_rownr_m2 "rows per square meter (number)"

local number 1
foreach var of varlist  seed_rate rowplanting_space rowplanting_g_row rowplanting_rownr_m2 {
	twoway  (lpolyci YIELD_actual_is `var' if  `var'<60, lp(solid) lwidth(medium)), ylabel(,angle(360) nogrid) xtitle(`"`: var label `var''"') ytitle("teff yield (kg/ha)") graphregion(color(white) lcolor(white)) legend(off) name(graph`number', replace)  aspect(0.5) nodraw
	local number=`number'+1
	}
graph combine graph1 graph2 graph3 graph4, r(2) c(2) iscale(0.7) graphregion(color(white) lcolor(white))

********************************
** TABLE 6: ATT at FTC  level **
********************************
use  "C:\Users\n12073\Dropbox\farmer selection files\Paper_yield\EDCC\Second resubmission 02.2018\data sharing\rowplanting_FTClevel.dta", clear
gen teff_quncho= sf2==1
reg output_ton_ha treatment_rp,  cluster(kebele)
gen interaction= FTC_DAprob*treatment_rp
reg output_ton_ha treatment_rptr  FTC_DAprob interaction , cluster(kebele)

reg output_ton_ha participants_ftc ,  cluster(kebele)
estimates store b2
reg output_ton_ha participants_ftc i.kebele,  cluster(kebele)
estimates store b3
reg output_ton_ha participants_ftc Dap_kg_ha UREA_kg_ha seed_kg_ha teff_quncho weeding_no i.kebele,  cluster(kebele)
estimates store b4
outreg2 [ b2 b3 b4] using myfile, auto(3)  bdec(3) sdec(3) excel replace stats(coef se ci) paren(se) bracket(ci) sortvar(participants initialselected ) nodepvar
